In [1]:
import pandas as pd

In [2]:
import targetsmosh as tm

In [29]:
ghgfins = pd.read_pickle("../CDPdata/s12_ghgfins.pkl")
targets = pd.read_pickle("../CDPdata/targets_all.pkl")

In [30]:
targets = targets.reset_index().set_index(["Organisation", "year"])
ghgfintars = ghgfins.join(targets[["has absolute", "has intensity", "target type"]])

In [31]:
# some years for some companies don't have target infos
# set them all to false
ghgfintars["has absolute"].fillna(False, inplace=True)
ghgfintars["has intensity"].fillna(False, inplace=True)

In [32]:
# i should compare first by GICS Industry Group
ghgfintars.to_pickle("../CDPdata/ghgfintars.pkl")
ghgfintars.head()


Out[32]:
Country GICS Industry GICS Sector scope1 scope2 Revenues COGS Equity PPE Assets Income 1and2 total 1and2 intensity has absolute has intensity target type
Organisation year
3M Company 2009 USA Industrial Conglomerates Industrials 3290000 1690000 24939.230138 11709.779078 13766.567205 7549.825324 29390.391440 3443.798894 4980000 199.685394 False False NaN
2010 USA Industrial Conglomerates Industrials 4300000 1950000 28294.483474 13489.279853 16622.027404 7724.684765 32002.417059 4335.119833 6250000 220.891115 True False Absolute emissions reduction
2011 USA Industrial Conglomerates Industrials 4060000 2030000 30530.950861 14906.148276 15899.066640 7904.166334 32598.241952 4416.063711 6090000 199.469713 False True Intensity target
2012 USA Industrial Conglomerates Industrials 4540000 2230000 30349.027827 14611.254468 17836.549092 8502.680415 34380.138666 4510.135088 6770000 223.071396 False True Intensity target
2013 USA Industrial Conglomerates Industrials 4550000 2230000 30843.417282 14721.834526 17486.362258 8644.269584 33520.023641 4654.837262 6780000 219.820000 False False No

In [33]:
# get company counts again.  it should be the same as for ghgfins
len(ghgfintars.index.levels[0].value_counts().index) #1243 instead of 1247 requested from COMPUSTAT
len(ghgfintars) # 4673


Out[33]:
4673

plot results


In [5]:
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()
import datavis as dv


BokehJS successfully loaded.

In [270]:
reload(dv)


Out[270]:
<module 'datavis' from 'datavis.py'>

In [150]:
compareintensity = forcomparing[['year', 'percent change 1and2 intensity', 'often cares']]
compareintensity = compareintensity[compareintensity['percent change 1and2 intensity'].notnull()]
compareintensity = compareintensity.groupby('often cares')

In [273]:
xycomint = dv.prep_groups(compareintensity)

In [269]:
xycomby = dv.prep_groups(compareby)

In [14]:
scatter = dv.scatter_groups(xycomby, "foo.html", "industry vs. change", "year", "percent change")
scatter.show()


Wrote foo.html

In [275]:
scatter = dv.scatter_groups(xycomint.values(), "intensity v has_target 3_13.html", "% Intensity change vs. Has Target", "year", "percent change")
scatter.show()


Wrote intensity v has_target 3_13.html

histograms by target setting for a given year


In [96]:
reload(tm)


Out[96]:
<module 'targetsmosh' from 'targetsmosh.py'>

In [37]:
ghgfintars["has target"] = ghgfintars["has absolute"] +  ghgfintars["has intensity"]
ghgfintars["has target"] = ghgfintars["has target"].apply(lambda(x): min(x, 1))
ghgfintars["has target"].fillna(0, inplace=True)

In [38]:
# get if there was a target the previous year
ghgfintars = tm.get_hadtarget(ghgfintars)

In [22]:
gft_hi = {}
for yr in range(2010,2014):
    gft_hi[yr] = gft_year.loc[yr].groupby("has intensity")

In [23]:
hi_values = {}
for yr in range(2010, 2014):
    hi_values[yr] = {"No target": gft_hi[yr].get_group(False)["percent change 1and2 intensity"].tolist(),
                     "Had target": gft_hi[yr].get_group(True)["percent change 1and2 intensity"].tolist()}

In [41]:
gftall = prep_forhist(ghgfintars, "percent change 1and2 intensity", -.5, .5)
gft_hiall = gftall.groupby("has intensity")
hiall_values = {"No target": gft_hiall.get_group(False)["percent change 1and2 intensity"].tolist(),
               "Had target": gft_hiall.get_group(True)["percent change 1and2 intensity"].tolist()}

In [51]:
# had intensity last year
gft_gs = {}
for yr in range(2010,2014):
    gft_gs[yr] = gft_year.loc[yr].groupby("had intensity last year")
ht_values = {}
for yr in range(2011, 2014):
    ht_values[yr] = {"No target": gft_gs[yr].get_group(False)["percent change 1and2 intensity"].tolist(),
                     "Had target": gft_gs[yr].get_group(True)["percent change 1and2 intensity"].tolist()}

In [107]:
hily_values = ht_values

In [56]:
fname = "2013 had inttarget vs int change.html"
title =  "Effect of Intensity Target on % Intensity Change 2013"
# title = "Scope 1 and 2 Total Change by Year"
# fname = "12intchangeyear.html"
hist = Histogram(ht_values[2011], bins=50, filename=fname, title = title, legend=True)

In [57]:
show(hist)



In [ ]: